This script wrangles actigraphy data for subsequent analyses # libraries

library(tidyverse)
library(naniar)
library(lubridate)
library(lmerTest)

1 reading in databases

## # A tibble: 1 x 1
##   start_date_max
##   <date>        
## 1 2021-04-25
## # A tibble: 1 x 1
##   start_date_max
##   <date>        
## 1 2020-08-20
## # A tibble: 1 x 1
##   start_date_max
##   <date>        
## 1 2020-08-14

2 cleaning databases

filter only for sleep intervals, and selecting only efficiency

options(scipen = 999)
# From Reed & Sacco
# DSE <- onset_latency + TST + WASO + TASAFA
# SE = (sleep_time/DSE)*100
  
d_vars <- c("data_start_date","onset_latency", "snooze_time", "efficiency", "duration", "sleep_time", "waso", "wake_time", "percent_wake", "number_of_wake_bouts", "avg_wake_b", "sleep_time", "number_of_sleep_bouts")

# d1
d1_clean <-
  d1 %>%
  rename(
    ELS_ID = subject_id
  ) %>%
  mutate(
    ELS_ID = as.character(ELS_ID),
    timepoint = 
      ifelse(
        str_detect(ELS_ID, "T3"),
        "T3",
        NA
        )
    ) %>%
  relocate(timepoint, .after = ELS_ID) %>%
  mutate(
    ELS_ID = str_remove(ELS_ID, "-T3"),
    ELS_ID = as.numeric(ELS_ID),
    ELS_ID = factor(ELS_ID)
  ) %>%
  filter(interval_type == "SLEEP") %>%  
  dplyr::select(ELS_ID, 
                timepoint,
                data_start_date,
                data_start_time,
                interval_type,
                interval_number,
                start_date,
                start_time,
                end_date,
                end_time,
                duration,
                onset_latency,
                snooze_time,
                efficiency,
                waso,
                wake_time,
                percent_wake,
                number_of_wake_bouts,
                avg_wake_b,
                sleep_time,
                percent_sleep,
                number_of_sleep_bouts,
                avg_sleep_b,
                inv_time_sw) %>%
  filter(
    !is.na(interval_number)
    ) %>%
  filter(
    inv_time_sw == 0
    ) %>%
  mutate(
    sleep_time_hrs = sleep_time/60
  )

# d2
d2_clean <-
  d2 %>%
  rename(
    ELS_ID = subject_id
  ) %>%
  mutate(
    ELS_ID = as.character(ELS_ID),
    timepoint = 
      ifelse(
        str_detect(ELS_ID, "T3"),
        "T3",
        NA
        )
    ) %>%
  relocate(timepoint, .after = ELS_ID) %>%
  mutate(
    ELS_ID = str_remove(ELS_ID, "-T3"),
    ELS_ID = as.numeric(ELS_ID),
    ELS_ID = factor(ELS_ID)
  ) %>%
  filter(interval_type == "SLEEP") %>%  
  dplyr::select(ELS_ID, 
                timepoint,
                data_start_date,
                data_start_time,
                interval_type,
                interval_number,
                start_date,
                start_time,
                end_date,
                end_time,
                duration,
                onset_latency,
                snooze_time,
                efficiency,
                waso,
                wake_time,
                percent_wake,
                number_of_wake_bouts,
                avg_wake_b,
                sleep_time,
                percent_sleep,
                number_of_sleep_bouts,
                avg_sleep_b,
                inv_time_sw) %>%
  filter(
    !is.na(interval_number)
    ) %>%
  filter(
    inv_time_sw == 0
    ) %>%
  mutate(
    sleep_time_hrs = sleep_time/60
  ) 

# d3
d3_clean <-
  d3 %>%
  rename(
    ELS_ID = subject_id
  ) %>%
  mutate(
    ELS_ID = as.character(ELS_ID),
    timepoint = 
      ifelse(
        str_detect(ELS_ID, "T3"),
        "T3",
        NA
        )
    ) %>%
  relocate(timepoint, .after = ELS_ID) %>%
  mutate(
    ELS_ID = str_remove(ELS_ID, "-T3"),
    ELS_ID = as.numeric(ELS_ID),
    ELS_ID = factor(ELS_ID)
  ) %>%
  filter(interval_type == "SLEEP") %>%  
  dplyr::select(ELS_ID, 
                timepoint,
                data_start_date,
                data_start_time,
                interval_type,
                interval_number,
                start_date,
                start_time,
                end_date,
                end_time,
                duration,
                onset_latency,
                snooze_time,
                efficiency,
                waso,
                wake_time,
                percent_wake,
                number_of_wake_bouts,
                avg_wake_b,
                sleep_time,
                percent_sleep,
                number_of_sleep_bouts,
                avg_sleep_b,
                inv_time_sw) %>%
  filter(
    !is.na(interval_number)
    ) %>%
  filter(
    inv_time_sw == 0
    ) %>%
  mutate(
    sleep_time_hrs = sleep_time/60
  )

# d4
# in this database 2 ids were entered as each other by accident, so separating now by date administered correclty
### 109 was 6/23
### 171 was 7/8
d4_clean <-
  d4 %>%
  rename(
    ELS_ID = subject_id
  ) %>%
  mutate(
    ELS_ID = as.character(ELS_ID),
    timepoint = 
      ifelse(
        str_detect(ELS_ID, "T3"),
        "T3",
        NA
        )
    ) %>%
  relocate(timepoint, .after = ELS_ID) %>%
  mutate(
    ELS_ID_chr =
      ifelse(
        str_detect(ELS_ID, "171-T3 and 109-T3") &
          str_detect(data_start_date, "6/23/19"),
        "109-T3",
        NA
        )
    ) %>%
  relocate(ELS_ID_chr, .after = ELS_ID) %>%
  mutate(
    ELS_ID_chr =
      ifelse(
        str_detect(ELS_ID, "171-T3 and 109-T3") &
          str_detect(data_start_date, "7/7/19"),
        "171-T3",
        ELS_ID_chr
        )
    ) %>%
  relocate(ELS_ID_chr, .after = ELS_ID) %>%
  mutate(
    ELS_ID_chr =
      ifelse(is.na(ELS_ID_chr),
             ELS_ID,
             ELS_ID_chr)
  ) %>%
  dplyr::select(-ELS_ID) %>%
  rename(
    ELS_ID = ELS_ID_chr
  ) %>%
  mutate(
    ELS_ID = str_remove(ELS_ID, "-T3"),
    ELS_ID = as.numeric(ELS_ID),
    ELS_ID = factor(ELS_ID)
  ) %>%
  filter(interval_type == "SLEEP") %>%  
  dplyr::select(ELS_ID, 
                timepoint,
                data_start_date,
                data_start_time,
                interval_type,
                interval_number,
                start_date,
                start_time,
                end_date,
                end_time,
                duration,
                onset_latency,
                snooze_time,
                efficiency,
                waso,
                wake_time,
                percent_wake,
                number_of_wake_bouts,
                avg_wake_b,
                sleep_time,
                percent_sleep,
                number_of_sleep_bouts,
                avg_sleep_b,
                inv_time_sw) %>%
  filter(
    !is.na(interval_number)
    ) %>%
  filter(
    inv_time_sw == 0
    ) %>%
  filter(
    ELS_ID != "196" # using 196x in the cov df bc aligns with MW
  ) %>%
  mutate(
    sleep_time_hrs = sleep_time/60
  )

# covid df
cov_clean <- 
  cov %>%
  rename(
    ELS_ID = subject_id
    ) %>%
  mutate(
    ELS_ID = as.character(ELS_ID),
    timepoint = 
      ifelse(
        str_detect(ELS_ID, "T3"),
        "T3",
        NA
        ),
    timepoint =
      ifelse(
        str_detect(ELS_ID,"x-T3"),
        "T3x",
        timepoint
      ),
    timepoint =
      ifelse(
        str_detect(ELS_ID, "T4"),
        "T4",
        timepoint
      )
    ) %>%
  relocate(timepoint, .after = ELS_ID) %>%
  mutate(
    ELS_ID = str_remove(ELS_ID, "-T3"),
    ELS_ID = str_remove(ELS_ID, "-T4"),
    ELS_ID = str_remove(ELS_ID, "x"),
    ELS_ID = as.numeric(ELS_ID),
    ELS_ID = factor(ELS_ID)
  ) %>%
  filter(interval_type == "SLEEP") %>%
  dplyr::select(ELS_ID, 
                timepoint,
                data_start_date,
                data_start_time,
                interval_type,
                interval_number,
                start_date,
                start_time,
                end_date,
                end_time,
                duration,
                onset_latency,
                snooze_time,
                efficiency,
                waso,
                wake_time,
                percent_wake,
                number_of_wake_bouts,
                avg_wake_b,
                sleep_time,
                percent_sleep,
                number_of_sleep_bouts,
                avg_sleep_b,
                inv_time_sw) %>%
  filter(
    !is.na(interval_number)
    ) %>%
  filter(
    inv_time_sw == 0
    ) %>%
  mutate(
    sleep_time_hrs = sleep_time/60
  )

# participants administered actigraphy during COVID-19 from database 4
d4_cov_clean <- 
  d4_cov %>%
  rename(
    ELS_ID = subject_id
    ) %>%
  mutate(
    ELS_ID = as.character(ELS_ID),
    timepoint = 
      ifelse(
        str_detect(ELS_ID, "T3"),
        "T3",
        NA
        ),
    timepoint =
      ifelse(
        str_detect(ELS_ID, "T4"),
        "T4",
        timepoint
      )
    ) %>%
  relocate(timepoint, .after = ELS_ID) %>%
  mutate(
    ELS_ID = str_remove(ELS_ID, "-T3"),
    ELS_ID = str_remove(ELS_ID, "-T4"),
    ELS_ID = str_remove(ELS_ID, "x"),
    ELS_ID = as.numeric(ELS_ID),
    ELS_ID = factor(ELS_ID)
  ) %>%
  filter(interval_type == "SLEEP") %>%
  dplyr::select(ELS_ID, 
                timepoint,
                data_start_date,
                data_start_time,
                interval_type,
                interval_number,
                start_date,
                start_time,
                end_date,
                end_time,
                duration,
                onset_latency,
                snooze_time,
                efficiency,
                waso,
                wake_time,
                percent_wake,
                number_of_wake_bouts,
                avg_wake_b,
                sleep_time,
                percent_sleep,
                number_of_sleep_bouts,
                avg_sleep_b,
                inv_time_sw) %>%
  filter(
    !is.na(interval_number)
    ) %>%
  filter(
    inv_time_sw == 0
    ) %>%
  mutate(
    sleep_time_hrs = sleep_time/60
  ) 

3 extracting data start date for reduced dataframe

d1_clean_startdate_red <-
  d1_clean %>%
  dplyr::select(ELS_ID, data_start_date, timepoint) %>%
  unique() %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date)
  ) %>%
  dplyr::select(-data_start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

d2_clean_startdate_red <-
  d2_clean %>%
  dplyr::select(ELS_ID, data_start_date, timepoint) %>%
  unique()  %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date)
  ) %>%
  dplyr::select(-data_start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

d3_clean_startdate_red <-
  d3_clean %>%
  dplyr::select(ELS_ID, data_start_date, timepoint) %>%
  unique()  %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date)
  ) %>%
  dplyr::select(-data_start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

d4_clean_startdate_red <-
  d4_clean %>%
  dplyr::select(ELS_ID, data_start_date, timepoint) %>%
  unique()  %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date)
  ) %>%
  dplyr::select(-data_start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

cov_clean_startdate_red <-
  cov_clean %>%
  dplyr::select(ELS_ID, data_start_date, timepoint) %>%
  unique()  %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date)
  ) %>%
  dplyr::select(-data_start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

d4_cov_clean_startdate_red <-
  d4_cov_clean %>%
  dplyr::select(ELS_ID, data_start_date, timepoint) %>%
  unique()  %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date)
  ) %>%
  dplyr::select(-data_start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

4 extracting data start date for long datafame

d1_clean_startdate <-
  d1_clean %>%
  dplyr::select(ELS_ID, data_start_date, start_date) %>%
  unique() %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date),
    start_date = mdy(start_date),
    actigraphy_day = day(start_date)
  ) %>%
  dplyr::select(-data_start_date, -start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

d2_clean_startdate <-
  d2_clean %>%
  dplyr::select(ELS_ID, data_start_date, start_date) %>%
  unique()  %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date),
    start_date = mdy(start_date),
    actigraphy_day = day(start_date)
  ) %>%
  dplyr::select(-data_start_date, -start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

d3_clean_startdate <-
  d3_clean %>%
  dplyr::select(ELS_ID, data_start_date, start_date) %>%
  unique()  %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date),
    start_date = mdy(start_date),
    actigraphy_day = day(start_date)
  ) %>%
  dplyr::select(-data_start_date, -start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

d4_clean_startdate <-
  d4_clean %>%
  dplyr::select(ELS_ID, data_start_date, start_date) %>%
  unique()  %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date),
    start_date = mdy(start_date),
    actigraphy_day = day(start_date)
  ) %>%
  dplyr::select(-data_start_date, -start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

cov_clean_startdate <-
  cov_clean %>%
  dplyr::select(ELS_ID, data_start_date, start_date) %>%
  unique()  %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date),
    start_date = mdy(start_date),
    actigraphy_day = day(start_date)
  ) %>%
  dplyr::select(-data_start_date, -start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

d4_cov_clean_startdate <-
  d4_cov_clean %>%
  dplyr::select(ELS_ID, data_start_date, start_date) %>%
  unique()  %>%
  mutate(
    actigraphy_start_date = mdy(data_start_date),
    start_date = mdy(start_date),
    actigraphy_day = day(start_date)
  ) %>%
  dplyr::select(-data_start_date, -start_date) %>%
  mutate(ELS_ID = factor(ELS_ID))

4.1 merging long-form dataframes

# making start times the same object
d3_clean_time <-
  d3_clean %>%
  mutate(
    start_time = as.character(start_time),
    end_time = as.character(end_time)
  )

d4_clean_time <-
  d4_clean %>%
  mutate(
    start_time = as.character(start_time),
    end_time = as.character(end_time)
  )

cov_clean_time <-
  cov_clean %>%
  mutate(
    start_time = as.character(start_time),
    end_time = as.character(end_time)
  )

d4_cov_clean_time <-
  d4_cov_clean %>%
  mutate(
    start_time = as.character(start_time),
    end_time = as.character(end_time)
  )

actig_long <- bind_rows(d1_clean, d2_clean, d3_clean_time, d4_clean_time, cov_clean_time, d4_cov_clean_time)


actig_long_2wk <-
  actig_long %>%
  filter(
    interval_number < 15
  )
actig_long_2wk <-
  actig_long_2wk %>%
  mutate(
    triggerdate = mdy(start_date),
    actstartdate = mdy(data_start_date)
  )

4.1.1 creating day and week variables

# creating day and week variables
actig_long_2wk_day <-
  actig_long_2wk %>%
  mutate(
    day = day(triggerdate),
    wday = wday(triggerdate, label = TRUE),
    ELS_ID = factor(ELS_ID)
  ) 

# day num
actig_long_2wk_dayorder  <-
  actig_long_2wk_day %>%
  group_by(ELS_ID) %>% 
  mutate(
    dayorder = order(triggerdate)
    )

4.1.2 wkday or wkend

actig_long_2wk_dayorder2 <-
  actig_long_2wk_dayorder %>%
  mutate(
    week =
      ifelse(
        wday == "Sun" |
          wday == "Sat",
        "wkend",
        "wkday"
      )
  )

4.1.3 distribution of weekday vs. weekend and association with sleep eff and hrs

level_key <- c(wkday = "weekday", wkend = "weekend")
actig_long_2wk_dayorder2 <-
  actig_long_2wk_dayorder2 %>%
  mutate(
    week = factor(week),
    week =  recode_factor(week,!!!level_key),
    dayorder = as.numeric(dayorder),
    sleep_time_hrs = as.numeric(sleep_time_hrs)
  )

actig_long_2wk_dayorder2 %>%
  ggplot(
    aes(x = week, y = efficiency, fill = week)
  ) +
  geom_violin(alpha=0.5, color= "black") +
  geom_boxplot(width=0.1, color = "grey", alpha=0.5) +
  scale_fill_manual(values = c("#FFA07A","#6B8E23")) +
  theme_classic() +
  labs(x = "Weekday or Weekend", y = "Sleep Effiency")

ggsave("wkdayvsweeknd_sleepeff.png", width = 7, height = 6)

summary(lmer(scale(efficiency) ~ scale(dayorder) +factor(week) + (1 + scale(dayorder)|ELS_ID), data = actig_long_2wk_dayorder2))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## scale(efficiency) ~ scale(dayorder) + factor(week) + (1 + scale(dayorder) |  
##     ELS_ID)
##    Data: actig_long_2wk_dayorder2
## 
## REML criterion at convergence: 4777.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.2256 -0.4075  0.1553  0.5907  2.3435 
## 
## Random effects:
##  Groups   Name            Variance Std.Dev. Corr
##  ELS_ID   (Intercept)     0.35648  0.5971       
##           scale(dayorder) 0.01381  0.1175   0.05
##  Residual                 0.63570  0.7973       
## Number of obs: 1860, groups:  ELS_ID, 144
## 
## Fixed effects:
##                       Estimate Std. Error         df t value Pr(>|t|)  
## (Intercept)            0.01518    0.05479  157.27870   0.277   0.7820  
## scale(dayorder)       -0.05090    0.02271  167.15509  -2.241   0.0263 *
## factor(week)weekend   -0.06284    0.04134 1722.66288  -1.520   0.1287  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) scl(d)
## scal(dyrdr)  0.049       
## fctr(wk)wkn -0.223  0.025
actig_long_2wk_dayorder2 %>%
  ggplot(
    aes(x = week, y = sleep_time_hrs, fill = week)
  ) +
  geom_violin(alpha=0.5, color= "black") +
  geom_boxplot(width=0.1, color = "grey", alpha=0.5) +
  scale_fill_manual(values = c("#FFA07A","#6B8E23")) +
  theme_classic() +
  labs(x = "Weekday or Weekend", y = "Objective Sleep Duration")

ggsave("wkdayvsweeknd_objsleephrs.png", width = 7, height = 6)



summary(lmer(scale(sleep_time_hrs) ~ scale(dayorder) + factor(week) + (1 + scale(dayorder)|ELS_ID), data = actig_long_2wk_dayorder2))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: scale(sleep_time_hrs) ~ scale(dayorder) + factor(week) + (1 +  
##     scale(dayorder) | ELS_ID)
##    Data: actig_long_2wk_dayorder2
## 
## REML criterion at convergence: 5042.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6205 -0.5237  0.0153  0.5649  4.1910 
## 
## Random effects:
##  Groups   Name            Variance Std.Dev. Corr 
##  ELS_ID   (Intercept)     0.210580 0.45889       
##           scale(dayorder) 0.008784 0.09372  -0.37
##  Residual                 0.773040 0.87923       
## Number of obs: 1860, groups:  ELS_ID, 144
## 
## Fixed effects:
##                        Estimate  Std. Error          df t value Pr(>|t|)   
## (Intercept)           -0.049484    0.045642  171.469961  -1.084  0.27981   
## scale(dayorder)        0.008872    0.023289  174.180880   0.381  0.70369   
## factor(week)weekend    0.140398    0.045342 1736.431852   3.096  0.00199 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) scl(d)
## scal(dyrdr) -0.082       
## fctr(wk)wkn -0.294  0.027
sleep_week <-
  actig_long_2wk_dayorder2 %>%
  group_by(ELS_ID, week) %>%
  summarize(
    n = n()
  )
## `summarise()` has grouped output by 'ELS_ID'. You can override using the `.groups` argument.
actig_long_2wk_dayorder2_spread <-
  actig_long_2wk_dayorder2 %>%
  spread(week, efficiency)


actig_long_2wk_dayorder2 %>%
  ggplot(
    aes(
      x = dayorder, 
      y = efficiency,
      color = week
    )
  ) +
  geom_line(aes(group = ELS_ID, color = week), alpha = .3) +
  geom_smooth(method = "loess", se=FALSE, size = 2) +
  theme_classic() +
  scale_x_continuous(
    name = "Day",
    limits = c(1, 14),                    
    breaks = seq(1, 14, 1)
    ) +
   scale_y_continuous(
    name = "Sleep Efficiency",
    limits = c(1, 100),                    
    breaks = seq(1, 100, 10)
    ) + 
  scale_color_manual(values = c("#FFA07A","#6B8E23"))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 33 rows containing non-finite values (stat_smooth).
## Warning: Removed 33 row(s) containing missing values (geom_path).

ggsave("sleepeff_byDay_byWkdayWkend.png", width = 7, height = 6)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 33 rows containing non-finite values (stat_smooth).

## Warning: Removed 33 row(s) containing missing values (geom_path).
actig_long_2wk_dayorder2 %>%
  ggplot(
    aes(
      x = dayorder, 
      y = sleep_time_hrs,
      color = week
    )
  ) +
  geom_line(aes(group = ELS_ID, color = week), alpha = .3) +
  geom_smooth(method = "loess", se=FALSE, size = 2) +
  theme_classic() +
  scale_x_continuous(
    name = "Day",
    limits = c(1, 14),                    
    breaks = seq(1, 14, 1)
    ) +
   scale_y_continuous(
    name = "Sleep Hours",
    limits = c(0, 13),                    
    breaks = seq(0, 13, 1)
    ) + 
  scale_color_manual(values = c("#FFA07A","#6B8E23"))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 33 rows containing non-finite values (stat_smooth).

## Warning: Removed 33 row(s) containing missing values (geom_path).

ggsave("objsleephrs_byDay_byWkdayWkend.png", width = 7, height = 6)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 33 rows containing non-finite values (stat_smooth).

## Warning: Removed 33 row(s) containing missing values (geom_path).

4.2 merging reduced dataframes

5 reducing dataframes

taking the average of efficiency over the span of the 14 days to get a more “representative” efficiency

# d1
d1_reduced <- 
  d1_clean %>%
  mutate(
    ELS_ID = as.factor(ELS_ID)
  ) %>%
  group_by(ELS_ID) %>%
  summarize(
    eff_perc_mean =
      mean(efficiency, na.rm = TRUE),
    eff_perc_sd =
      sd(efficiency, na.rm = TRUE),
    onset_lat_mean = 
      mean(onset_latency, na.rm = TRUE),
    onset_lat_sd =
      sd(onset_latency, na.rm = TRUE),
    snooze_mean =
      mean(snooze_time, na.rm = TRUE),
    snooze_sd = 
      sd(snooze_time, na.rm = TRUE),
    sleep_dur_mean =
      mean(sleep_time, na.rm = TRUE),
    sleep_dur_sd = 
      sd(sleep_time, na.rm = TRUE),
    sleep_time_hrs_mean =
      mean(sleep_time_hrs, na.rm = TRUE),
    sleep_time_hrs_sd =
      sd(sleep_time_hrs, na.rm = TRUE)
    )

d1_reduced <-
  left_join(
    d1_clean_startdate_red,
    d1_reduced,
    by = "ELS_ID"
  )

# d2
d2_reduced <- 
  d2_clean %>%
  mutate(
    ELS_ID = as.factor(ELS_ID)
  ) %>%
  group_by(ELS_ID)  %>%
  summarize(
    eff_perc_mean =
      mean(efficiency, na.rm = TRUE),
    eff_perc_sd =
      sd(efficiency, na.rm = TRUE),
    onset_lat_mean = 
      mean(onset_latency, na.rm = TRUE),
    onset_lat_sd =
      sd(onset_latency, na.rm = TRUE),
    snooze_mean =
      mean(snooze_time, na.rm = TRUE),
    snooze_sd = 
      sd(snooze_time, na.rm = TRUE),
    sleep_dur_mean =
      mean(sleep_time, na.rm = TRUE),
    sleep_dur_sd = 
      sd(sleep_time, na.rm = TRUE),
    sleep_time_hrs_mean =
      mean(sleep_time_hrs, na.rm = TRUE),
    sleep_time_hrs_sd =
      sd(sleep_time_hrs, na.rm = TRUE)
    )

d2_reduced <-
  left_join(
    d2_clean_startdate_red,
    d2_reduced,
    by = "ELS_ID"
  )

# d3
d3_reduced <- 
  d3_clean %>%
  mutate(
    ELS_ID = as.factor(ELS_ID)
  ) %>%
  group_by(ELS_ID) %>%
  summarize(
    eff_perc_mean =
      mean(efficiency, na.rm = TRUE),
    eff_perc_sd =
      sd(efficiency, na.rm = TRUE),
    onset_lat_mean = 
      mean(onset_latency, na.rm = TRUE),
    onset_lat_sd =
      sd(onset_latency, na.rm = TRUE),
    snooze_mean =
      mean(snooze_time, na.rm = TRUE),
    snooze_sd = 
      sd(snooze_time, na.rm = TRUE),
    sleep_dur_mean =
      mean(sleep_time, na.rm = TRUE),
    sleep_dur_sd = 
      sd(sleep_time, na.rm = TRUE),
    sleep_time_hrs_mean =
      mean(sleep_time_hrs, na.rm = TRUE),
    sleep_time_hrs_sd =
      sd(sleep_time_hrs, na.rm = TRUE)
    )

d3_reduced <-
  left_join(
    d3_clean_startdate_red,
    d3_reduced,
    by = "ELS_ID"
  )

# d4
d4_reduced <- 
  d4_clean %>%
  mutate(
    ELS_ID = as.factor(ELS_ID)
  ) %>%
  group_by(ELS_ID)  %>%
  summarize(
    eff_perc_mean =
      mean(efficiency, na.rm = TRUE),
    eff_perc_sd =
      sd(efficiency, na.rm = TRUE),
    onset_lat_mean = 
      mean(onset_latency, na.rm = TRUE),
    onset_lat_sd =
      sd(onset_latency, na.rm = TRUE),
    snooze_mean =
      mean(snooze_time, na.rm = TRUE),
    snooze_sd = 
      sd(snooze_time, na.rm = TRUE),
    sleep_dur_mean =
      mean(sleep_time, na.rm = TRUE),
    sleep_dur_sd = 
      sd(sleep_time, na.rm = TRUE),
    sleep_time_hrs_mean =
      mean(sleep_time_hrs, na.rm = TRUE),
    sleep_time_hrs_sd =
      sd(sleep_time_hrs, na.rm = TRUE)
    )

d4_reduced <-
  left_join(
    d4_clean_startdate_red,
    d4_reduced,
    by = "ELS_ID"
  )

# cov
cov_reduced <-
  cov_clean %>%
  mutate(
    ELS_ID = as.factor(ELS_ID)
  ) %>%
  group_by(ELS_ID)  %>%
  summarize(
    eff_perc_mean =
      mean(efficiency, na.rm = TRUE),
    eff_perc_sd =
      sd(efficiency, na.rm = TRUE),
    onset_lat_mean = 
      mean(onset_latency, na.rm = TRUE),
    onset_lat_sd =
      sd(onset_latency, na.rm = TRUE),
    snooze_mean =
      mean(snooze_time, na.rm = TRUE),
    snooze_sd = 
      sd(snooze_time, na.rm = TRUE),
    sleep_dur_mean =
      mean(sleep_time, na.rm = TRUE),
    sleep_dur_sd = 
      sd(sleep_time, na.rm = TRUE),
    sleep_time_hrs_mean =
      mean(sleep_time_hrs, na.rm = TRUE),
    sleep_time_hrs_sd =
      sd(sleep_time_hrs, na.rm = TRUE)
    )

cov_reduced <-
  left_join(
    cov_clean_startdate_red,
    cov_reduced,
    by = "ELS_ID"
  )

# d4 cov
d4_cov_reduced <-
  d4_cov_clean %>%
  mutate(
    ELS_ID = as.factor(ELS_ID)
  ) %>%
  group_by(ELS_ID)  %>%
  summarize(
    eff_perc_mean =
      mean(efficiency, na.rm = TRUE),
    eff_perc_sd =
      sd(efficiency, na.rm = TRUE),
    onset_lat_mean = 
      mean(onset_latency, na.rm = TRUE),
    onset_lat_sd =
      sd(onset_latency, na.rm = TRUE),
    snooze_mean =
      mean(snooze_time, na.rm = TRUE),
    snooze_sd = 
      sd(snooze_time, na.rm = TRUE),
    sleep_dur_mean =
      mean(sleep_time, na.rm = TRUE),
    sleep_dur_sd = 
      sd(sleep_time, na.rm = TRUE),
    sleep_time_hrs_mean =
      mean(sleep_time_hrs, na.rm = TRUE),
    sleep_time_hrs_sd =
      sd(sleep_time_hrs, na.rm = TRUE)
  )

d4_cov_reduced <-
  left_join(
    d4_cov_clean_startdate_red,
    d4_cov_reduced,
    by = "ELS_ID"
  )
actig_merged <- bind_rows(d1_reduced, d2_reduced, d3_reduced, d4_reduced, cov_reduced, d4_cov_reduced)

6 Data Viz

6.1 Day to Day sleep efficiency (software based)

actig_long_2wk %>%
  ggplot(
    aes(
      interval_number,
      efficiency,
      group = ELS_ID
      )
    ) +
  geom_line(
    alpha = 1/2
    ) +
  facet_wrap(~ELS_ID) +
  theme_minimal() +
  theme(
    strip.text.x = element_blank()
    ) +
  labs(
    x = "Day",
    y = "Sleep Efficiency"
    ) +
 scale_y_continuous(limits = c(0,100))

ggsave("daily_act_sleep_eff_traj.png", width = 7, height = 7, dpi = 400)

6.2 Day to Day sleep dur

actig_long_2wk %>%
  ggplot(
    aes(
      interval_number,
      sleep_time_hrs,
      group = ELS_ID
      )
    ) +
  geom_line(
    alpha = 1/2
    ) +
  facet_wrap(~ELS_ID) +
  theme_minimal() +
  theme(
  strip.text.x = element_blank()
  ) +
  labs(
    x = "Day",
    y = "Sleep Time (hrs)"
    ) 

ggsave("daily_act_sleep_time_traj.png", width = 7, height = 7, dpi = 400)

7 sleep instability?

computing the rmssd as per Koval et al., 2013; Jahng et al., 2008 The RMSSD is measured as the square root of the average of the squared differences between affect at measurement i and i + 1 (Schoevers et al., 2020)

sleep_lag_df <-
  actig_long_2wk  %>%
  dplyr::select(
    ELS_ID,interval_number, efficiency, sleep_time, sleep_time_hrs
  ) %>%
  group_by(ELS_ID) %>%
  summarize(
    # step 1: computing successive difference
    sleep_hrs_succ_diff = sleep_time_hrs - lag(sleep_time_hrs),
    # step 2: computing square of each diff
    sleep_hrs_sq_succ_diff = sleep_hrs_succ_diff^2
  )
## `summarise()` has grouped output by 'ELS_ID'. You can override using the `.groups` argument.
sleep_mssd_df <-
  sleep_lag_df %>%
  group_by(ELS_ID) %>%
  summarize(
    # step 3: averaging the squared differences
    mean_sq_sleep_hrs_succ_diff = mean(sleep_hrs_sq_succ_diff, na.rm = TRUE),
    # step 4: computing square root of the average
    rmssd_sleep_hrs = sqrt(mean_sq_sleep_hrs_succ_diff)
  ) %>%
  mutate(ELS_ID = factor(ELS_ID)) 

7.0.0.1 joining dataframes

actig_sleep <-
  left_join(
    actig_merged,    
    sleep_mssd_df,
    by = "ELS_ID"
  ) %>%
  relocate(actigraphy_start_date, .after = ELS_ID)

7.1 summary stats

actig_sleep_summary <-
  actig_sleep %>%
  summarize(
    n = n(),
    sleep_eff_mean = mean(eff_perc_mean),
    sleep_eff_sd = sd(eff_perc_mean),
    sleep_time_mean = mean(sleep_dur_mean),
    sleep_time_sd = sd(sleep_dur_mean),
    sleep_time_hrs_mean = mean(sleep_time_hrs_mean),
    sleep_time_hrs_sd = sd(sleep_time_hrs_mean)
  )
write_csv(actig_sleep_summary, "actig_sleep_summary_stats.csv")  

8 Data Viz

8.1 distributions of sleep eff

# mean

actig_sleep %>%
  ggplot(
    aes(x = eff_perc_mean)
  ) +
  geom_histogram(alpha = .5, color="black") +
  theme_classic() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# sd

actig_sleep %>%
  ggplot(
    aes(x = eff_perc_sd)
  ) +
  geom_histogram(alpha = .5, color = "black") +
  theme_classic() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

8.2 distributions of sleep time

# mean

actig_sleep %>%
  ggplot(
    aes(x = sleep_time_hrs_mean)
  ) +
  geom_histogram(alpha = .5, color="black") +
  theme_classic() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# rmssd
actig_sleep %>%
  ggplot(
    aes(x = rmssd_sleep_hrs)
  ) +
  geom_histogram(alpha = .5, color="black") +
  theme_classic() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

8.2.1 writing out reduced and long form actigraphy dataframe

write_csv(actig_sleep, "~/Box/Mooddata_Coordinating/1_Lab_Coordinating/Users/JackieSchwartz/Dissertation/0_MW_Act_Demo_Descriptives/actig_merged.csv")
write_csv(actig_long_2wk, "~/Box/Mooddata_Coordinating/1_Lab_Coordinating/Users/JackieSchwartz/Dissertation/0_MW_Act_Demo_Descriptives/actig_long.csv")